import pandas as pd
from glob import glob
import gzip
import json
import os
import openpyxl

def get_papa_mama():
    s = """
    Baba, Papa, Mummy, Mumma, mom, dad, daddy, father, mother, Abba, pita
    Dad, Daddy, Father, Papa, Pops, Pop, Dada, Pater, Papi, Poppa, Poppy, dadi
    Pa, Da, Ma, Maa
    Mom, Mommy, Mother, Mama, Mum, Mummy, Mumma, Mumsy, Mumzie
    Appa, Thaatha, Amma, Ammi
    Thatha, Pitta, Ammi, Mummy
    Baba, Pitta, Mummy
    Baba, Papa, Mummy
    Bapu, Papa, Mummy
    Pitaji, Papa, Mummy
    Baba, Papa, Mummy
    Thatha, Papa, Amma, Mummy
    Achan, Papa, Amma, Mummy

    beta, Beti, Patni, Pyar, Baby, Dear, honey, Sweetheart, Mamaji, Pati, Mausi
    Son, Sonny
    Daughter
    Wife, Husband
    Uncle, aunt
    beloved, honeys, love, lovey, my, me, myself, mine, own, loveyou, missyou, brother, sister, brotherhood, sisterhood, best friend
    """
    s = s.strip().split("\n")
    papa_mama = []
    for item in s:
        papa_mama.extend([x.strip().lower() for x in item.split(',')])
    return set(papa_mama)


def load_data():
    # f_out = open("./trace.tsv", 'w', encoding="utf-8")
    papa_mama = get_papa_mama()
    dict_hit_papa_mama = {}
    dir = r'/Users/wangjianwen/Downloads/addressbook'
    cnt = 0
    for fname in glob(f"{dir}/*.gz"):
        cnt += 1
        biz_no = fname.split(".")[0].split("\\")[-1]
        # f_out.write(f">>>>>>>>>>>>>>bizno:{biz_no}\n")
        add_object = json.load(gzip.open(fname, 'rb'))
        for item in add_object:
            k = item['display_name']
            if k.lower() in papa_mama:
                dict_hit_papa_mama[biz_no] = 1
    print(cnt, len(dict_hit_papa_mama) / cnt)


def loan_data1(directory):
    # 创建一个新的Excel工作簿
    workbook = openpyxl.Workbook()
    # 获取活动的工作表
    sheet = workbook.active

    for root, dirs, files in os.walk(directory):
        # print("当前文件夹:", root)
        for file in files:
            file_path = os.path.join(root, file)
            print("文件:", file_path)
            data = json.load(open(file_path, 'r'))
            print(data)

            # 写入Excel
            for row_idx, item in enumerate(data, start=1):
                print(item['display_name'], item['number'])
                for col_idx, value in enumerate(item.values(), start=1):
                    sheet.cell(row=row_idx, column=col_idx, value=value)

    # 保存Excel文件
    workbook.save('output.xlsx')


def loan_data2(directory):
    for root, dirs, files in os.walk(directory):
        # 创建一个空的 DataFrame
        df = pd.DataFrame()
        # 将 DataFrame 写入 Excel 文件
        df.to_excel('output.xlsx', index=False)

        print("当前文件夹:", root)
        for file in files:
            file_path = os.path.join(root, file)
            print("文件:", file_path)
            data = json.load(open(file_path, 'r'))
            print(data)

            # 创建一个 ExcelWriter 对象，指定文件名
            with pd.ExcelWriter('output.xlsx', mode='a', engine='openpyxl') as writer:
                # 将 JSON 数据转换为 DataFrame
                df = pd.DataFrame(data)

                # 将 DataFrame 写入 Excel 文件，指定 sheet_name 为 'Sheet1'，index 参数为 False，表示不写入行索引
                df.to_excel(writer, sheet_name=file)
            print("文件:", file_path, "写入完毕")
            # 将 JSON 数据转换为 DataFrame
            # df = pd.DataFrame(data)
            # 将 DataFrame 写入 Excel 文件
            # df.to_excel('output1.xlsx')


if __name__ == "__main__":
    # load_data()
    # loan_data1('/Users/wangjianwen/Downloads/addressbook')
    loan_data2('/Users/wangjianwen/Downloads/addressbook')